library(raster) # raster handling (needed for relief)
## Loading required package: sp
library(viridis) # viridis color scale
## Loading required package: viridisLite
library(cowplot) # stack ggplots
library(colorspace) #nicer color scales
##
## Attaching package: 'colorspace'
## The following object is masked from 'package:raster':
##
## RGB
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks raster::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks raster::select()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(rworldmap)
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
# set default figure parameters for knitr
knitr::opts_chunk$set(warning = FALSE, message = FALSE, progress = FALSE, verbose = FALSE)
wmap <- rworldmap::getMap(resolution = "low") %>%
st_as_sf()
ggplot(data = wmap) +
geom_sf(aes(fill = NAME)) +
theme_minimal() +
guides(fill = FALSE) # don't show legend
wmap_ant <- getMap()[-which(getMap()$ADMIN=='Antarctica'),] %>%
st_as_sf()
ggplot(data = wmap_ant) +
geom_sf(aes(fill = NAME)) +
theme_minimal() +
guides(fill = FALSE) # don't show legend
This example is taken directly from the SF reference guide and reproduced here so you can see it in context. ## Simple example: https://ggplot2.tidyverse.org/reference/ggsf.html
nc <- sf::st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
ggplot(nc) +
geom_sf(aes(fill = AREA))
# If not supplied, coord_sf() will take the CRS from the first layer
# and automatically transform all other layers to use that CRS. This
# ensures that all data will correctly line up
nc_3857 <- sf::st_transform(nc, 3857)
ggplot() +
geom_sf(data = nc) +
geom_sf(data = nc_3857, colour = "red", fill = NA)
# Unfortunately if you plot other types of feature you'll need to use
# show.legend to tell ggplot2 what type of legend to use
nc_3857$mid <- sf::st_centroid(nc_3857$geometry)
ggplot(nc_3857) +
geom_sf(colour = "white") +
geom_sf(aes(geometry = mid, size = AREA), show.legend = "point")
### Incorporating points (annotation)
# You can also use layers with x and y aesthetics. To have these interpreted
# as longitude/latitude you need to set the default CRS in coord_sf()
ggplot(nc_3857) +
geom_sf() +
annotate("point", x = -80, y = 35, colour = "red", size = 4) +
coord_sf(default_crs = sf::st_crs(4326))
### You can add labels (ahhh! too busy!)
# To add labels, use geom_sf_label().
ggplot(nc_3857) +
geom_sf(aes(fill = AREA)) +
geom_sf_label(aes(label = NAME))
# To add labels, use geom_sf_label().
ggplot(nc_3857[1:3,]) +
geom_sf(aes(fill = AREA)) +
geom_sf_label(aes(label = NAME))
Now, let’s incorporate our own data.
il_counties <- map_data("county", "illinois")
head(il_counties)
## long lat group order region subregion
## 1 -91.49563 40.21018 1 1 illinois adams
## 2 -90.91121 40.19299 1 2 illinois adams
## 3 -90.91121 40.19299 1 3 illinois adams
## 4 -90.91121 40.10704 1 4 illinois adams
## 5 -90.91121 39.83775 1 5 illinois adams
## 6 -90.91694 39.75754 1 6 illinois adams
ggplot(il_counties, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "grey50") +
coord_quickmap()
il_county <- tigris::counties(state = "illinois", cb = TRUE) %>%
st_as_sf()
ggplot(il_county) +
geom_sf(color = "blue", fill = "lightgray")
Data source: https://geo-csv.com/illinois/
illinois <- read_csv("data/illinois.csv")
ggplot(il_county) +
geom_sf(color = "blue", fill = "gray") +
geom_point(data = illinois,
aes(x = city_longitude, y = city_latitude),
colour = "darkblue", size = 0.1, alpha = 0.4) +
coord_sf()
# bring in county pop + cities
pop <- read_csv("data/dceo_county_pop.csv")
# merge by county
pop_all <- pop %>% filter(`Age Group` == "All" & Race == "All") %>% mutate(NAME = `State/County`)
county_pop <- full_join(il_county, pop_all, by="NAME")
# plot
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
scale_fill_continuous_sequential(palette = "viridis", rev = FALSE) +
coord_sf()
#try breaks
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
scale_fill_binned_sequential(
palette = "viridis",
rev = FALSE,
aesthetics = c("fill", "color"),
n.breaks = 3,
) +
labs(fill="2005 population") +
coord_sf()
#try logging
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
scale_fill_continuous_sequential(palette = "viridis", rev = FALSE, trans = "log", ) +
labs(fill="2005 population") +
coord_sf()
# plot
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
scale_fill_continuous_sequential(palette = "viridis", rev = FALSE, trans = "log", ) +
labs(fill="2005 population") +
geom_point(data = illinois, # bringing in our city data
aes(x = city_longitude, y = city_latitude),
colour = "white", size = 0.01, alpha = 0.2) + # room to improve aesthetically
coord_sf()
ggsave("il_counties_cities.png")
Source: https://timogrossenbacher.ch/bivariate-maps-with-ggplot2-and-sf/ and https://timogrossenbacher.ch/bivariate-maps-with-ggplot2-and-sf/
I did not create any of this, just FYI! it’s a streamlined version from the source website and repo
Notice that I tried to remain faithful to the original code while
removing little pieces here and there. I also updated some of the
outdated code that had been deprecated. Because a) there are a lot of
pieces and b) the code calls dataframes, mutates elements of them and
then overwrites them, you need to pay attention to how you run the
various chunks. If you run a chunk once, running it a second time will
likely give you issues with the code. Finally, we haven’t discussed
%<>% in class, so I used the traditional pipe – if
you look at the source code for this, they use the assignment pipe that
allows you to replace a <- a %>% with
a %<>%.
# read cantonal borders
canton_geo <- read_sf("data/input/g2k15.shp")
# read country borders
country_geo <- read_sf("data/input/g2l15.shp")
# read lakes
lake_geo <- read_sf("data/input/g2s15.shp")
# read productive area (2324 municipalities)
municipality_prod_geo <- read_sf("data/input/gde-1-1-15.shp")
# read in raster of relief
relief <- raster("data/input/02-relief-ascii.asc") %>%
# hide relief outside of Switzerland by masking with country borders
mask(country_geo) %>%
as("SpatialPixelsDataFrame") %>%
as.data.frame() %>%
rename(value = `X02.relief.ascii`)
# clean up
rm(country_geo)
data <- read_csv("data/input/data.csv")
# create 3 buckets for gini
quantiles_gini <- data %>%
pull(gini) %>%
quantile(probs = seq(0, 1, length.out = 4))
# create 3 buckets for mean income
quantiles_mean <- data %>%
pull(mean) %>%
quantile(probs = seq(0, 1, length.out = 4))
Here the municipalities are put into the appropriate class corresponding to their average income and income (in-)equality.
# cut into groups defined above and join fill
municipality_prod_geo <- municipality_prod_geo %>%
left_join(data, by = c("BFS_ID" = "bfs_id"))
class(municipality_prod_geo)
## [1] "sf" "tbl_df" "tbl" "data.frame"
# pretyfiying labels
# define number of classes
no_classes <- 6
# extract quantiles
quantiles <- municipality_prod_geo %>%
pull(mean) %>%
quantile(probs = seq(0, 1, length.out = no_classes + 1)) %>%
as.vector() # to remove names of quantiles, so idx below is numeric
# here we create custom labels
labels <- imap_chr(quantiles, function(., idx){
return(paste0(round(quantiles[idx] / 1000, 0),
"k",
" – ",
round(quantiles[idx + 1] / 1000, 0),
"k"))
})
# we need to remove the last label
# because that would be something like "478k - NA"
labels <- labels[1:length(labels) - 1]
municipality_prod_geo <- municipality_prod_geo %>%
mutate(mean_quantiles = cut(mean,
breaks = quantiles,
labels = labels,
include.lowest = T))
ggplot(
# define main data source
data = municipality_prod_geo
) +
# raster comes as the first layer, municipalities on top
geom_raster(
data = relief, aes( x = x, y = y, alpha = value ) ) +
# use the "alpha hack"
scale_alpha(name = "", range = c(0.6, 0), guide = "none") +
geom_sf(
mapping = aes(
fill = mean_quantiles
),
color = "white",
size = 0.1
) +
# use the Viridis color scale
scale_fill_viridis(
option = "magma",
name = "Average\nincome in CHF",
alpha = 0.8, # make fill a bit brighter
begin = 0.1, # this option seems to be new (compared to 2016):
# with this we can truncate the
# color scale, so that extreme colors (very dark and very bright) are not
# used, which makes the map a bit more aesthetic
end = 0.9,
discrete = T, # discrete classes, thus guide_legend instead of _colorbar
direction = 1, # dark is lowest, yellow is highest
guide = guide_legend(
keyheight = unit(5, units = "mm"),
title.position = "top",
reverse = T # display highest income on top
)) +
geom_sf(
data = canton_geo,
fill = "transparent",
color = "white",
size = 0.5
)+
# draw lakes in light blue
geom_sf(
data = lake_geo,
fill = "#D6F1FF",
color = "transparent"
) +
# add titles
labs(x = NULL,
y = NULL,
title = "Switzerland's regional income (in-)equality",
subtitle = paste0("Average yearly income and income",
" (in-)equality in Swiss municipalities, 2015"))
Here, we’re going to set up the map and the legend. First, we will do the map.
# create color scale that encodes two variables
# red for gini and blue for mean income
# the special notation with gather is due to readibility reasons
bivariate_color_scale <- tibble(
"3 - 3" = "#3F2949", # high inequality, high income
"2 - 3" = "#435786",
"1 - 3" = "#4885C1", # low inequality, high income
"3 - 2" = "#77324C",
"2 - 2" = "#806A8A", # medium inequality, medium income
"1 - 2" = "#89A1C8",
"3 - 1" = "#AE3A4E", # high inequality, low income
"2 - 1" = "#BC7C8F",
"1 - 1" = "#CABED0" # low inequality, low income
) %>%
gather("group", "fill")
municipality_prod_geo <- municipality_prod_geo %>%
mutate(
gini_quantiles = cut(
gini,
breaks = quantiles_gini,
include.lowest = TRUE
),
mean_quantiles = cut(
mean,
breaks = quantiles_mean,
include.lowest = TRUE
),
# by pasting the factors together as numbers we match the groups defined
# in the tibble bivariate_color_scale
group = paste(
as.numeric(gini_quantiles), "-",
as.numeric(mean_quantiles)
)
) %>%
# we now join the actual hex values per "group"
# so each municipality knows its hex value based on the his gini and avg
# income value
left_join(bivariate_color_scale, by = "group")
map <- ggplot(
# use the same dataset as before
data = municipality_prod_geo
) +
# first: draw the relief
geom_raster(
data = relief,
aes(
x = x,
y = y,
alpha = value
)
) +
# use the "alpha hack" (as the "fill" aesthetic is already taken)
scale_alpha(name = "",
range = c(0.6, 0),
guide = F) + # suppress legend
# color municipalities according to their gini / income combination
geom_sf(
aes(
fill = fill
),
# use thin white stroke for municipalities
color = "white",
size = 0.1
) +
# as the sf object municipality_prod_geo has a column with name "fill" that
# contains the literal color as hex code for each municipality, we can use
# scale_fill_identity here
scale_fill_identity() +
# use thicker white stroke for cantons
geom_sf(
data = canton_geo,
fill = "transparent",
color = "white",
size = 0.5
) +
# draw lakes in light blue
geom_sf(
data = lake_geo,
fill = "#D6F1FF",
color = "transparent"
) +
# add titles
labs(x = NULL,
y = NULL,
title = "Switzerland's regional income (in-)equality",
subtitle = paste0("Average yearly income and income",
" (in-)equality in Swiss municipalities, 2015")) +
# add the theme
theme_map()
Now we will set up the legend.
# separate the groups
bivariate_color_scale <- bivariate_color_scale %>%
separate(group, into = c("gini", "mean"), sep = " - ") %>%
mutate(gini = as.integer(gini),
mean = as.integer(mean))
legend <- ggplot() +
geom_tile(
data = bivariate_color_scale,
mapping = aes(
x = gini,
y = mean,
fill = fill)
) +
scale_fill_identity() +
labs(x = "Higher inequality",
y = "Higher income") +
theme_map() +
# make font small enough
theme(
axis.title = element_text(size = 6)
) +
# quadratic tiles
coord_fixed()
ggdraw() +
draw_plot(map, 0, 0, 1, 1) +
draw_plot(legend, 0.05, 0.075, 0.2, 0.2)
These are going to look really good but wow are they fussy.
annotations <- tibble(
label = c(
"Grey areas mean\nlow income and\nlow inequality",
"Blue areas mean\nhigh income and\nlow inequality",
"Violet areas mean\nhigh income and\nhigh inequality",
"Red areas mean\nlow income and\nhigh inequality"
),
arrow_from = c(
"548921,232972", # grey
"771356,238335", # blue
"781136,125067", # violet
"616348,81869" # red
),
arrow_to = c(
"622435,206784", # grey
"712671,261998", # blue
"786229,149597", # violet
"602334,122674" # red
),
curvature = c(
0.2, # grey
0.1, # blue
-0.1, # violet
-0.2 # red
),
nudge = c(
"-3000,0", # grey
"3000,5000", # blue
"0,-5000", # violet
"3000,0" # red
),
just = c(
"1,0", # grey
"0,1", # blue
"0.5,1", # violet
"0,1" # red
)
) %>%
separate(arrow_from, into = c("x", "y")) %>%
separate(arrow_to, into = c("xend", "yend")) %>%
separate(nudge, into = c("nudge_x", "nudge_y"), sep = "\\,") %>%
separate(just, into = c("hjust", "vjust"), sep = "\\,")
Bringing in annotations to the map – notice we’re placing them after specifying them above.
map_a <- ggplot(
# use the same dataset as before
data = municipality_prod_geo
) +
# first: draw the relief
geom_raster(
data = relief,
aes(
x = x,
y = y,
alpha = value
)
) +
# use the "alpha hack" (as the "fill" aesthetic is already taken)
scale_alpha(name = "",
range = c(0.6, 0),
guide = F) + # suppress legend
# color municipalities according to their gini / income combination
geom_sf(
aes(
fill = fill
),
# use thin white stroke for municipalities
color = "white",
size = 0.1
) +
# as the sf object municipality_prod_geo has a column with name "fill" that
# contains the literal color as hex code for each municipality, we can use
# scale_fill_identity here
scale_fill_identity() +
# use thicker white stroke for cantons
geom_sf(
data = canton_geo,
fill = "transparent",
color = "white",
size = 0.5
) +
# draw lakes in light blue
geom_sf(
data = lake_geo,
fill = "#D6F1FF",
color = "transparent"
) +
# add titles
labs(x = NULL,
y = NULL,
title = "Switzerland's regional income (in-)equality",
subtitle = paste0("Average yearly income and income",
" (in-)equality in Swiss municipalities, 2015")) +
# add the theme
theme_map()
# add annotations one by one by walking over the annotations data frame
# this is necessary because we cannot define nudge_x, nudge_y and curvature
# in the aes in a data-driven way like as with x and y, for example
annotations %>%
pwalk(function(...) {
# collect all values in the row in a one-rowed data frame
current <- tibble(...)
# convert all columns from x to vjust to numeric
# as pwalk apparently turns everything into a character (why???)
current <- current %>%
mutate_at(vars(x:vjust), as.numeric)
# update the plot object with global assignment
map_a <<- map_a +
# for each annotation, add an arrow
geom_curve(
data = current,
aes(
x = x,
xend = xend,
y = y,
yend = yend
),
# that's the whole point of doing this loop:
curvature = current %>% pull(curvature),
size = 0.2,
arrow = arrow(
length = unit(0.005, "npc")
)
) +
# for each annotation, add a label
geom_text(
data = current,
aes(
x = x,
y = y,
label = label,
hjust = hjust,
vjust = vjust
),
# that's the whole point of doing this loop:
nudge_x = current %>% pull(nudge_x),
nudge_y = current %>% pull(nudge_y),
color = "black",
size = 3
)
})
Once we’ve saved the map and legend, we can bring them together in this plot.
ggdraw() +
draw_plot(map_a, 0, 0, 1, 1) +
draw_plot(legend, 0.06, 0.075, 0.2, 0.2)
ggsave("switzerland.png", dpi = 400) #saving for posterity